# Import Packages
import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score
# Read data from files
df_train = pd.read_csv("2022_train.csv")
df_test = pd.read_csv("2022_test.csv")
df_test2 = pd.read_csv("2022_test.csv")
df_train.head()
| Id | GP | MIN | PTS | FGM | FGA | FG% | 3P Made | 3PA | 3P% | ... | FTA | FT% | OREB | DREB | REB | AST | STL | BLK | TOV | TARGET_5Yrs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3799 | 80 | 24.3 | 7.8 | 3.0 | 6.4 | 45.7 | 0.1 | 0.3 | 22.6 | ... | 2.9 | 72.1 | 2.2 | 2.0 | 3.8 | 3.2 | 1.1 | 0.2 | 1.6 | 1 |
| 1 | 3800 | 75 | 21.8 | 10.5 | 4.2 | 7.9 | 55.1 | -0.3 | -1.0 | 34.9 | ... | 3.6 | 67.8 | 3.6 | 3.7 | 6.6 | 0.7 | 0.5 | 0.6 | 1.4 | 1 |
| 2 | 3801 | 85 | 19.1 | 4.5 | 1.9 | 4.5 | 42.8 | 0.4 | 1.2 | 34.3 | ... | 0.6 | 75.7 | 0.6 | 1.8 | 2.4 | 0.8 | 0.4 | 0.2 | 0.6 | 1 |
| 3 | 3802 | 63 | 19.1 | 8.2 | 3.5 | 6.7 | 52.5 | 0.3 | 0.8 | 23.7 | ... | 1.5 | 66.9 | 0.8 | 2.0 | 3.0 | 1.8 | 0.4 | 0.1 | 1.9 | 1 |
| 4 | 3803 | 63 | 17.8 | 3.7 | 1.7 | 3.4 | 50.8 | 0.5 | 1.4 | 13.7 | ... | 0.5 | 54.0 | 2.4 | 2.7 | 4.9 | 0.4 | 0.4 | 0.6 | 0.7 | 1 |
5 rows × 21 columns
df_test.head()
| Id | GP | MIN | PTS | FGM | FGA | FG% | 3P Made | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | STL | BLK | TOV | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 56 | 9.1 | 4.0 | 1.6 | 3.7 | 43.7 | 0.1 | 0.3 | 7.3 | 0.7 | 1.2 | 63.4 | 1.2 | 0.8 | 1.7 | 0.4 | 0.2 | 0.3 | 0.8 |
| 1 | 1 | 43 | 19.3 | 10.1 | 3.7 | 8.1 | 46.0 | 0.6 | 1.7 | 35.1 | 1.8 | 2.5 | 75.3 | 0.5 | 0.9 | 1.5 | 3.5 | 0.6 | 0.0 | 1.8 |
| 2 | 2 | 82 | 33.9 | 11.3 | 4.9 | 10.6 | 45.6 | 0.5 | 1.9 | 44.8 | 1.8 | 2.7 | 71.2 | 1.3 | 3.3 | 4.5 | 2.5 | 1.3 | 0.3 | 2.0 |
| 3 | 3 | 86 | 44.7 | 18.8 | 6.8 | 15.9 | 42.9 | 0.5 | 1.8 | 13.5 | 4.5 | 6.3 | 70.9 | 1.5 | 3.2 | 5.0 | 4.1 | 0.9 | 0.1 | 3.6 |
| 4 | 4 | 58 | 12.3 | 4.7 | 1.6 | 4.0 | 40.0 | 0.5 | 1.7 | 38.7 | 1.1 | 1.3 | 76.9 | 0.2 | 0.6 | 0.9 | 1.5 | 0.5 | -0.4 | 0.9 |
# Perform some EDA
ProfileReport(df_train, title="EDA of training data")
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]